In [1]:
#Importing library and initiating API client

import pandas as pd                                                                                        ##Data manipulation and analysis
from googleapiclient.discovery import build                                                                ##Interacting with Google APIs
from IPython.display import JSON                                                                           #JSON to readable format

#####Data visualization
import seaborn as sns                                                                                      # Statistical data visualization
import matplotlib.pyplot as plt                                                                            # Plotting graphs and charts
import matplotlib.ticker as ticker                                                                         # Configuring tick locations and formatting for plots


# Word cloud
import nltk                                                                                                 # Natural language processing toolkit
from nltk.corpus import stopwords                                                                           # Commonly used words that are usually filtered out in text analysis
from nltk.tokenize import word_tokenize                                                                     # Tokenizing words in text
nltk.download('stopwords')                                                                                  # Download the list of stopwords
nltk.download('punkt')                                                                                      # Download the Punkt tokenizer models
from wordcloud import WordCloud                                                                             # Generating word clouds from text

# Initialize the YouTube API client
API = build('youtube', 'v3', developerKey='AIzaSyBzS2VTt1-1Wm8t0AE1M42VNqopkUmtSHg')                        #Here AIzaSyBzS2VTt1-1Wm8t0AE1M42VNqopkUmtSHg is the API key
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\singh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\singh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
In [2]:
#Fetcing data
datarequested = API.search().list(  #fetching video data
    q='health and coffee',
    part='snippet',
    maxResults=50,
    type='video',
    order='relevance'
)
dataasked = datarequested.execute()
youtubevideos = [
    {
        'title': item['snippet']['title'],
        'channelTitle': item['snippet']['channelTitle'],                                                       # Geeting video information
        'videoId': item['id']['videoId'],
        'videoUrl': f"https://www.youtube.com/watch?v={item['id']['videoId']}"
    }
    for item in dataasked['items']
]
videodetails = [video['videoId'] for video in youtubevideos]
def get_stats(API, videodetails):                                                                                 #get stats
    videodescription = []
    for i in range(0, len(videodetails), 50):
        data_asked = API.videos().list(
            part="snippet,statistics",
            id=','.join(videodetails[i:i+50])
        ).execute()        
        for video in data_asked['items']:
            details = {
                'channelTitle': video['snippet']['channelTitle'],
                'title': video['snippet']['title'],
                'tags': video['snippet'].get('tags', []),
                'viewCount': int(video['statistics'].get('viewCount', 0)),
                'likeCount': int(video['statistics'].get('likeCount', 0)),
                'commentCount': int(video['statistics'].get('commentCount', 0))
            }
            details['tagCount'] = len(details['tags']) if details['tags'] else 0
            videodescription.append(details)    
    dataframe = pd.DataFrame(videodescription)
    return dataframe
In [3]:
# Get video statistics
videostatistics = get_stats(API, videodetails)
videostatistics
Out[3]:
channelTitle title tags viewCount likeCount commentCount tagCount
0 Click On Detroit | Local 4 | WDIV How drinking coffee impacts your health [Coffee, Health, Research, Science, Wellness W... 55727 396 44 5
1 Mayo Clinic Mayo Clinic Minute: Health Benefits of Coffee [Mayo Clinic (Hospital), Health Care (Issue), ... 187930 1734 0 6
2 Good Morning America New study shows the health benefits of coffee [Ashton, Dr., Jen, benefits, coffee, health, n... 55415 630 68 10
3 WXYZ-TV Detroit | Channel 7 Healthiest Types of Coffee with Dr. Oz [Detroit, Local News, distributable] 60491 612 56 3
4 Doctor Mike The Ugly Truth About Coffee’s Effects On Your ... [doctor mike, dr mike, drmike, dr. mike, mikha... 4829479 153737 8582 35
5 Doctor Mike Hansen Why Coffee Is Killing You...Slowly [Why Coffee Is Killing You, acrylamide in coff... 256735 6319 1450 11
6 ZOE Is Coffee Healthy? | James Hoffmann and Profes... [JIMSEVEN] 951104 13962 1183 1
7 Dr. Eric Berg DC 8 Unexpected Benefits of COFFEE You've Never H... [9 unexpected benefits of coffee, benefits of ... 840822 26569 1923 25
8 Weight Loss Freak ! WHY YOU SHOULD DRINK BLACK TEA EVERY DAY: 5 AM... [Weight Loss Freak, health and wellness, healt... 490 11 0 22
9 WXYZ-TV Detroit | Channel 7 Ask Dr. Nandi: Is decaf coffee harmful to health? [Detroit, metro Detroit, 7 Action News, Ask Dr... 95945 702 91 7
10 TheHealthNerd Black Coffee Benefits: 9 Proven Health Benefit... [black coffee benefits, benefits of black coff... 866033 18180 1440 19
11 The Dr. Gundry Podcast Is It SAFE To Drink Coffee Everyday? (Shocking... [dr gundry, dr. gundry, steven gundry, gundry ... 148362 3576 415 17
12 Health 6 Ways Coffee Benefits Your Health | #DeepDive... [health, coffee, coffee health problems, coffe... 187833 2687 74 24
13 The Yoga Institute Coffee - Good or Bad? | Dr. Hansaji Yogendra [yogainstitutemumbai, firstyogaschoolintheworl... 53004 1153 20 36
14 Dr. Eric Berg DC Drink COFFEE for a Fatty Liver and Gallstones [drink this for a fatty liver and gallstones, ... 624126 20119 1892 25
15 Dr. William Li Scientific Study about the Health Benefits of ... [] 32596 1239 68 0
16 JJ Medicine Coffee: Health Benefits (ex. Reduction of Canc... [Coffee, Coffee health, Coffee cancer, Coffee ... 57082 1003 37 18
17 Dr. Livingood The health benefits of coffee. [The 9 Benefits of Coffee, coffee benefits, co... 19505 663 85 11
18 Thomas DeLauer 30 Days of NO CAFFEINE has Surprising Effects [quit caffeine, 30 days no caffeine, 30 days n... 729207 13185 1546 19
19 Healthline Decaf Coffee: Healthy or Unhealthy? [coffee, dietitian, decaf, nutrition, nutritio... 200996 3739 227 14
20 HealthNormal 11 Facts About Coffee You Had No Idea About! [Coffee Benefits, Coffee, Coffee Health Benefi... 3030343 56207 1453 16
21 motivationaldoc What Happens to Honey in Hot Teas or Coffee! ... [] 531166 28027 581 0
22 WDTNTV Lemon coffee trend: Health experts caution sid... [2newsvideo, video] 26318 57 4 2
23 motivationaldoc What Coffee Does to the Heart, Brain, & Body -... [coffee, caffeine, stimulant, high blood press... 6288328 105246 7933 39
24 KenDBerryMD Is COFFEE bad for you? The (Truth about Coffee... [is coffee bad for you, dr berry coffee, is co... 449548 26119 2592 18
25 BRIGHT SIDE 7 Facts About Coffee You Probably Didn’t Know [health care, coffee, coffee benefits, facts a... 13596855 166939 5734 18
26 WPTV News - FL Palm Beaches and Treasure Coast 'Mushroom coffee' gaining popularity for healt... [mushroom coffee, health benefits of mushroom ... 35503 278 29 5
27 motivationaldoc The Coffee Cortisol Connection...1 Thing Not T... [coffee, caffeine, stimulant, high blood press... 555836 26267 1543 39
28 KenDBerryMD What’s in Dr Berry’s Coffee? [coffee time, coffee, cafe] 593928 27002 1642 3
29 Dr. Carlos THE TRUTH ABOUT COFFEE, THIS IS WHAT COFFEE AC... [benefits of caffeine, benefits of coffee, blo... 807019 10285 644 19
30 University of California Television (UCTV) Tea or Coffee? [Health, Diet, Wellness, Holistic Health, Inte... 34654 517 23 7
31 AsapSCIENCE Your Brain On Coffee [Science, AsapSCIENCE, Coffee, Caffeine, Brain... 6442507 82144 5655 51
32 motivationaldoc The Right Time to Drink Your Coffee! Dr. Mand... [] 481626 22238 429 0
33 Healthline 5 Ways to Make Your Coffee Healthier [nutrition, dietitian, make your coffee health... 63106 1813 102 10
34 Dr. Josh Axe Is Coffee Bad for You? | Dr. Josh Axe [dr axe, draxe, dr josh axe, josh axe, doctor ... 1257057 15410 1409 14
35 SAAOL Heart Center Coffee - Is it Good for Health ? | By Dr. Bima... [Coffee, Saaol, DrBimalChhajer, BimalChhajer] 847151 18203 662 4
36 Strength India Movement - Tamil / தமிழ் காபி குடிப்பது நல்லதா? | COFFEE - Is it good o... [Strength India Movement, Dr Ashwin Vijay, art... 140021 4092 209 30
37 The Infographics Show What Happens To Your Body When You Stop Drinki... [] 1561856 30769 2787 0
38 Discovery UK Is Coffee Good For You? - How Stuff Works [Discover UK, Discovery Channel, Discovery Plu... 482299 1783 420 11
39 We R Stupid I ❤️ my daily Coffee/Tea - Is it Bad for Health?? [#werstupid, #mondaymotivation, #health, #heal... 114866 9077 161 37
40 Sean Nalewanyj Shorts STOP Drinking Coffee In The Morning 🛑 [bodybuilding, fitness, build muscle, gym, wor... 2857533 158977 2932 14
41 Bestie Health Having 1 Cup Of Coffee Every Day Can Do This T... [Having 1 Cup Of Coffee Every Day Can Do This ... 421174 5780 314 23
42 AsapSCIENCE Are You Consuming Your Coffee Correctly? [Science, AsapSCIENCE, Coffee, Caffeine, Drink... 3019907 30194 2095 37
43 TMJ4 News Does trendy 'Bulletproof Coffee' have any heal... [WTMJ-TV, local news, 4p, news] 13164 70 2 4
44 Sean Hashmi MD How does Coffee affect Kidney Disease? [coffee and kidney disease, health, coffee ben... 1941378 28526 1977 27
45 Healthy Hamesha Is Coffee Good For Your Health | कॉफ़ी के फायदे... [coffee good or bad for health, coffee peene k... 457976 13007 471 7
46 Soukaina Kanice Why You Should Quit Coffee ? - The Health Bene... [quit coffee, health benefits of quitting caff... 203422 6599 1233 17
47 WatchMojo.com Top 10 Surprising Health Benefits of Coffee [coffee, coffee health benefits, coffee health... 68124 1958 336 24
48 NutritionFacts.org Do the Health Benefits of Coffee Apply to Ever... [benefits of coffee, black coffee benefits, ef... 206751 3132 392 15
49 The Dr. Gundry Podcast Why You Should NEVER Have Milk With Your Coffe... [dr gundry, dr. gundry, steven gundry, gundry ... 275736 7180 835 28
In [4]:
# statistical analysis
In [5]:
summary = videostatistics.groupby('channelTitle').agg(
    {'viewCount': 'sum', 'title': 'count', 'tagCount': 'sum'}
).rename(columns={'viewCount': 'Total Views', 'title': 'Video Count', 'tagCount': 'Total Tags'}).reset_index()
summary_channel = summary.sort_values(by='Total Views', ascending=False)
summary_channel
Out[5]:
channelTitle Total Views Video Count Total Tags
1 BRIGHT SIDE 13596855 1 18
0 AsapSCIENCE 9462414 2 88
40 motivationaldoc 7856956 4 78
5 Doctor Mike 4829479 1 35
14 HealthNormal 3030343 1 16
23 Sean Nalewanyj Shorts 2857533 1 14
22 Sean Hashmi MD 1941378 1 27
28 The Infographics Show 1561856 1 0
8 Dr. Eric Berg DC 1464948 2 50
9 Dr. Josh Axe 1257057 1 14
18 KenDBerryMD 1043476 2 21
39 ZOE 951104 1 1
30 TheHealthNerd 866033 1 19
21 SAAOL Heart Center 847151 1 4
7 Dr. Carlos 807019 1 19
31 Thomas DeLauer 729207 1 19
4 Discovery UK 482299 1 11
16 Healthy Hamesha 457976 1 7
27 The Dr. Gundry Podcast 424098 2 45
2 Bestie Health 421174 1 23
15 Healthline 264102 2 24
6 Doctor Mike Hansen 256735 1 11
20 NutritionFacts.org 206751 1 15
24 Soukaina Kanice 203422 1 17
19 Mayo Clinic 187930 1 6
13 Health 187833 1 24
35 WXYZ-TV Detroit | Channel 7 156436 2 10
25 Strength India Movement - Tamil / தமிழ் 140021 1 30
37 We R Stupid 114866 1 37
36 WatchMojo.com 68124 1 24
17 JJ Medicine 57082 1 18
3 Click On Detroit | Local 4 | WDIV 55727 1 5
12 Good Morning America 55415 1 10
29 The Yoga Institute 53004 1 36
34 WPTV News - FL Palm Beaches and Treasure Coast 35503 1 5
32 University of California Television (UCTV) 34654 1 7
11 Dr. William Li 32596 1 0
33 WDTNTV 26318 1 2
10 Dr. Livingood 19505 1 11
26 TMJ4 News 13164 1 4
38 Weight Loss Freak ! 490 1 22
In [6]:
#Get data as csv files
summary_channel.to_csv('summary_channel.csv', index=False)
videostatistics.to_csv('videostatistics.csv', index=False)
In [7]:
#Data analysis
# Scatter plot: Number of tags vs views & Views vs. likes and comments
plt.figure(figsize=(6, 6))
sns.scatterplot(data=videostatistics, x="tagCount", y="viewCount")
plt.title('Plot stating no. of tags vs view count')
plt.xlabel('No. of Tags')
plt.ylabel('Total No. of views')
plt.show()
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
sns.scatterplot(data=videostatistics, x='tagCount', y='likeCount', ax=ax[0])
ax[0].set_title('Plot stating no. of tags vs like count')
ax[0].set_xlabel('No. of Tags')
ax[0].set_ylabel('Total No. of likes')
sns.scatterplot(data=videostatistics, x='tagCount', y='commentCount', ax=ax[1])
ax[1].set_title('Plot stating no. of tags vs comment count')
ax[1].set_xlabel('No. of Tags')
ax[1].set_ylabel('No. of comments')
plt.tight_layout()
plt.show()
No description has been provided for this image
No description has been provided for this image
In [8]:
# Hastags associated with top performing videos
videostatistics['tags_str'] = videostatistics['tags'].apply(lambda x: ', '.join(x) if x else '')
top_videos = videostatistics.sort_values('viewCount', ascending=False).head(50)
plt.figure(figsize=(12, 8))
ax = sns.barplot(x='tags_str', y='viewCount', data=top_videos)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha='right')
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}K'.format(x / 1000)))
ax.set_title('Plot representing Top Performing Videos by View Count')
ax.set_xlabel('Hashtags Used in Videos')
ax.set_ylabel('Number of Views')
plt.tight_layout()
plt.show()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:6: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha='right')
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2953 (\N{TAMIL LETTER U}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Matplotlib currently does not support Tamil natively.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2975 (\N{TAMIL LETTER TTA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2993 (\N{TAMIL LETTER RRA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3021 (\N{TAMIL SIGN VIRAMA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2986 (\N{TAMIL LETTER PA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2991 (\N{TAMIL LETTER YA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3007 (\N{TAMIL VOWEL SIGN I}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2970 (\N{TAMIL LETTER CA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2990 (\N{TAMIL LETTER MA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2992 (\N{TAMIL LETTER RA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3009 (\N{TAMIL VOWEL SIGN U}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2980 (\N{TAMIL LETTER TA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2997 (\N{TAMIL LETTER VA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2950 (\N{TAMIL LETTER AA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2994 (\N{TAMIL LETTER LA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3019 (\N{TAMIL VOWEL SIGN OO}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2985 (\N{TAMIL LETTER NNNA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3016 (\N{TAMIL VOWEL SIGN AI}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2965 (\N{TAMIL LETTER KA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2995 (\N{TAMIL LETTER LLA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all Axes decorations.
  plt.tight_layout()
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2953 (\N{TAMIL LETTER U}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Matplotlib currently does not support Tamil natively.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2975 (\N{TAMIL LETTER TTA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2993 (\N{TAMIL LETTER RRA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3021 (\N{TAMIL SIGN VIRAMA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2986 (\N{TAMIL LETTER PA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2991 (\N{TAMIL LETTER YA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3007 (\N{TAMIL VOWEL SIGN I}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2970 (\N{TAMIL LETTER CA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2990 (\N{TAMIL LETTER MA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2992 (\N{TAMIL LETTER RA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3009 (\N{TAMIL VOWEL SIGN U}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2980 (\N{TAMIL LETTER TA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2997 (\N{TAMIL LETTER VA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2950 (\N{TAMIL LETTER AA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2994 (\N{TAMIL LETTER LA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3019 (\N{TAMIL VOWEL SIGN OO}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2985 (\N{TAMIL LETTER NNNA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3016 (\N{TAMIL VOWEL SIGN AI}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2965 (\N{TAMIL LETTER KA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2995 (\N{TAMIL LETTER LLA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
No description has been provided for this image
In [9]:
# Word cloud
stop_words = set(stopwords.words('english'))
videostatistics['tags_no_stopwords'] = videostatistics['tags'].apply(
    lambda x: [item for item in x if item.lower() not in stop_words] if x else []
)
word_cloud = [word for tags in videostatistics['tags_no_stopwords'] for word in tags]
wordcloudstr = ' '.join(word_cloud)
def plot_cloud(wordcloud):
    plt.figure(figsize=(30, 20))
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
wordcloud = WordCloud(
    width=2000, height=1000, random_state=1, background_color='black',
    colormap='cividis', collocations=False
).generate(wordcloudstr)
plot_cloud(wordcloud)

#for top 50 videos
data = videostatistics.sort_values(by='viewCount', ascending=False).head(50)
stop_words = set(stopwords.words('english'))
data['tags_no_stopwords'] = data['tags'].apply(
    lambda x: [item for item in x if item.lower() not in stop_words] if x else []
)
word_cloud = [word for tags in data['tags_no_stopwords'] for word in tags]
wordcloudstr = ' '.join(word_cloud)
def plot_cloud(wordcloud):
    plt.figure(figsize=(30, 20))
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
wordcloud = WordCloud(
    width=2000, height=1000, random_state=1, background_color='black',
    colormap='cividis', collocations=False
).generate(wordcloudstr)

plot_cloud(wordcloud)


#Applying filters
cofe = ["coffee", "espresso", "latte", "cappuccino", "americano", "mocha", "barista", "brewing", "roasting"]
health = ["health", "healthy", "nutrition", "wellness", "fitness", "lifestyle", "diet", "exercise", "well-being"]
stop_words = set(stopwords.words('english'))  #combine all the words
stop_words.update(cofe)
stop_words.update(health)
videostatistics['tags_no_stopwords'] = videostatistics['tags'].apply(
    lambda x: [item for item in x if item.lower() not in stop_words] if x else []
)
word_cloud = [word for tags in videostatistics['tags_no_stopwords'] for word in tags]
wordcloudstr = ' '.join(word_cloud)
def plot_cloud(wordcloud):
    plt.figure(figsize=(30, 20))
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
wordcloud = WordCloud(
    width=2000, height=1000, random_state=1, background_color='black',
    colormap='cividis', collocations=False
).generate(wordcloudstr)
plot_cloud(wordcloud)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image